\documentclass[a4paper]{article}
\usepackage[affil-it]{authblk}
\usepackage{indentfirst}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{geometry}
\usepackage{xeCJK}
\usepackage{enumitem}
\geometry{margin=1.5cm, vmargin={0pt,1cm}}
\setlength{\topmargin}{-1cm}
\setlength{\paperheight}{29.7cm}
\setlength{\textheight}{25.3cm}

\begin{document}
% ==================================================
\title{Numerical Analysis homework \# 5}

\author{周川迪 Zhou Chuandi 3220101409}
\affil{强基数学2201}

\date{\today}

\maketitle

\begin{abstract}
5.6.1 Theoretical questions from \textit{handoutsNUMPDEs-2024-08-17}.
\end{abstract}

\section*{I}
\textbf{Proof of Theorem 5.7:}

1. \(C[a, b]\) is a linear space over \(\mathbb{C}\) because it satisfies that: \(\forall u,v,w \in C[a, b], \alpha, \beta \in \mathbb{C}:\)
\begin{enumerate}[label=(\alph*)]
  \item \(\mathbb{C}\) is a field. The additive identity is \(0\), and the multiplicative identity is \(1\).
  \item Closure: \(\alpha u + \beta v \in C[a,b]\). Also it is well-defined.
  \item Commutative property: \(u+v = v+u\).
  \item Associative property of addition: \((u+v)+w = u+(v+w)\).
  \item Associative property of multiplication: \((\alpha\beta)u = \alpha(\beta u)\).
  \item Additive identity: \(0(x) \equiv 0 \in C[a, b], \forall u \in C[a, b], u+0 = 0+u = u\).
  \item Additive inverse: There exists \((-u)(t) = -u(t) \in C[a, b]\), such that \((-u)+u = u+(-u) = 0\).
  \item Multiplicative identity: \(1 \cdot u(t) = u(t) \Rightarrow 1u = u\).
  \item Distributive property: \( \alpha(u+v) = \alpha u + \alpha v,\ (\alpha+\beta)u = \alpha u + \beta u \).
\end{enumerate}

2. \(\langle \cdot, \cdot \rangle\) is an inner product, and \(C[a, b]\) is an inner product space over \(\mathbb{C}\):

\(\forall u, v, w \in C[a, b], \alpha \in \mathbb{C}\):
\begin{enumerate}[label=(\alph*)]
  \item Real positivity: \(\langle u, u \rangle = \int_a^b \rho(t) u(t) \overline{u(t)} \, dt = \int_a^b \rho(t) |u(t)|^2 \, dt \geq 0\).
  \item Definiteness: If \(\langle u, u \rangle = \int_a^b \rho(t) |u(t)|^2 \, dt = 0\), since \(\rho(t) > 0, u \in C[a, b]\), it follows that \(u(t) \equiv 0\).
  \item Linearity: \( \langle \alpha u+v, w \rangle = \int_a^b \rho(t) (\alpha u(t) + v(t)) \overline{w(t)} \, dt = \alpha \int_a^b \rho(t) u(t) \overline{w(t)} \, dt + \int_a^b \rho(t) v(t) \overline{w(t)} \, dt = \alpha \langle u, w \rangle + \langle v, w \rangle\).
  \item Conjugate symmetry: \(\langle v, u \rangle = \int_a^b \rho(t) v(t) \overline{u(t)} \, dt = \int_a^b \overline{\rho(t) u(t) \overline{v(t)}} \, dt = \overline{\langle u, v \rangle}\).
\end{enumerate}

3.\(\|\cdot\|_2\) is a mapping \(C[a, b] \rightarrow \mathbb{R}: \|u\|_2 = \left(\int_a^b \rho(t) |u(t)|^2 \, dt\right)^{\frac{1}{2}} = \sqrt{\langle u, u \rangle}, \forall u \in C[a, b]\).

According to the \textbf{Definition B.170}, \(\|u\|_2\) is induced by the inner product and thus it is a norm on \(C[a, b]\).






\section*{II}
Consider the Chebyshev polynomials \(T_n = \cos(n \arccos x)\) for \(n = 0, 1, 2, \ldots\).

\subsection*{(a)}

\begin{align*}
  \langle T_n, T_m \rangle &= \int_{-1}^1 \rho(t) T_n(t) T_m(t) \, dt = \int_{-1}^1 \frac{1}{\sqrt{1-t^2}} \cos(n \arccos t) \cos(m \arccos t) \, dt \\
  &= -\int_{-1}^1 \cos(n \arccos t) \cos(m \arccos t) \, d\arccos t \\
  &= \int_0^\pi \cos(n\theta) \cos(m\theta) \, d\theta \\
  &= \int_0^\pi \frac{\cos[(n-m)\theta] + \cos[(n+m)\theta]}{2} \, d\theta \\
\end{align*}

We have\[
\int_0^\pi \cos(kx)dx = \left\{
\begin{array}{ll}
  \pi & (k=0) \\
  \int_0^\pi \cos(kx)dx = \frac{1}{k}\int_0^{k\pi} \cos xdx=\frac{1}{k} \sin x \bigg|_0^{k\pi}=0 & (k \neq 0)
\end{array}
\right.
\]

So,\[
\langle T_n, T_m \rangle = \left\{
\begin{array}{ll}
  0 & (n \neq m) \\
  \frac{\pi}{2} & (n = m \neq 0) \\
  \pi & (n = m = 0)
\end{array}
\right.
\]

Thus, \(\{T_n\}\) is orthogonal.

\subsection*{(b)}

\(\|T_n\|_2^2 = \langle T_n, T_n \rangle = \left\{
\begin{array}{ll}
  \frac{\pi}{2} & n \neq 0 \\
  \pi & n = 0
  \end{array}
\right.\),

\(T_0(x) = 1, T_1(x) = x, T_2(x) = 2x^2 - 1\), \(\|T_0\|_2=\sqrt{\pi}, \|T_1\|_2=\sqrt{\frac{\pi}{2}}, \|T_2\|_2=\sqrt{\frac{\pi}{2}}\).

So for \(\{T_0, T_1, T_2\}\), the orthonormal basis is \(\left\{\sqrt{\frac{1}{\pi}}, \sqrt{\frac{2}{\pi}} x, \sqrt{\frac{2}{\pi}} (2x^2 - 1)\right\}\).







\section*{III}

Approximate the circular arc given by the equation \( y(x) = \sqrt{1-x^2} \) for \( x \in [-1,1] \).

\subsection*{(a) \( \rho(x) = \frac{1}{\sqrt{1-x^2}} \) with Fourier expansion.}

Classic orthonormal polynomials with respect to  \( \rho(x) = \frac{1}{\sqrt{1-x^2}} \) are Chebyshev polynomials of the first kind.

We have an orthonormal list \((T_0^*,T_1^*,T_2^*)\), where \(T_0^* = \sqrt{\frac{1}{\pi}}, T_1^* = \sqrt{\frac{2}{\pi}} x, T_2^* = \sqrt{\frac{2}{\pi}} (2x^2 - 1)\). So, \begin{align*}
&\langle y, T_0^* \rangle = \langle y, \sqrt{\frac{1}{\pi}} \rangle = \sqrt{\frac{1}{\pi}} \int_{-1}^1 \frac{1}{\sqrt{1-t^2}} \sqrt{1-t^2} \, dt = 2\sqrt{\frac{1}{\pi}}, \\
&\langle y, T_1^* \rangle = \langle y, \sqrt{\frac{2}{\pi}} x \rangle = \sqrt{\frac{2}{\pi}} \int_{-1}^1 \frac{1}{\sqrt{1-t^2}} \sqrt{1-t^2} t \, dt = 0, \\
&\langle y, T_2^* \rangle = \langle y, \sqrt{\frac{2}{\pi}} (2x^2 - 1) \rangle = \sqrt{\frac{2}{\pi}} \int_{-1}^1 \frac{1}{\sqrt{1-t^2}} \sqrt{1-t^2} (2t^2 - 1) \, dt = -\frac{2}{3}\sqrt{\frac{2}{\pi}}.
\end{align*}

Thus, \(\hat{y}(x) = \sum_{n=0}^2\langle y,T_n^* \rangle T_n^*= 2\sqrt{\frac{1}{\pi}} \cdot \sqrt{\frac{1}{\pi}} - \frac{2}{3}\sqrt{\frac{2}{\pi}} \cdot \sqrt{\frac{2}{\pi}} (2x^2 - 1) = \frac{10 - 8x^2}{3\pi}\).




\subsection*{(b) \( \rho(x) = \frac{1}{\sqrt{1-x^2}} \) with normal equations.}

To use normal equations, we just need a list of independent functions. Basically we take \((1, x, x^2)\).\begin{align*}
\langle 1, 1 \rangle &= \int_{-1}^1 \frac{1}{\sqrt{1-x^2}} \, dx = \arccos x \bigg|_{-1}^1 = \pi, \\
\langle 1, x \rangle &= \int_{-1}^1 \frac{x}{\sqrt{1-x^2}} \, dx = 0, \\
\langle x, x \rangle &= \int_{-1}^1 \frac{x^2}{\sqrt{1-x^2}} \, dx = -\int_0^\pi \frac{\cos^2\theta}{\sin\theta} \, d\cos\theta = \int_0^\pi \cos^2\theta \, d\theta = B\left(\frac{1}{2}, \frac{3}{2}\right) =\frac{1}{2}B\left(\frac{1}{2}, \frac{1}{2}\right) = \frac{\pi}{2}, \\
\langle x, x^2 \rangle &= \int_{-1}^1 \frac{x^3}{\sqrt{1-x^2}} \, dx = 0, \\
\langle x^2, x^2 \rangle &= \int_{-1}^1 \frac{x^4}{\sqrt{1-x^2}} \, dx = -\int_0^\pi \frac{\cos^4\theta}{\sin\theta} \, d\cos\theta = \int_0^\pi \cos^4\theta \, d\theta = B\left(\frac{1}{2}, \frac{5}{2}\right) =\frac{3}{4}B\left(\frac{1}{2}, \frac{3}{2}\right)=\frac{3\pi}{8}.
\end{align*}

(Here \(B(x, y)\) is the beta function where \(B(x, y) = \frac{y-1}{x+y-1}B(x, y-1)\) and \(B\left(\frac{1}{2},\frac{1}{2}\right)=\pi\).) 

And recall \( y= \sqrt{1-x^2} \) and \(\rho(x) = \frac{1}{\sqrt{1-x^2}}\), we have \[
G(1, x, x^2) = \begin{bmatrix}
\langle 1, 1 \rangle & \langle 1, x \rangle & \langle 1, x^2 \rangle \\
\langle x, 1 \rangle & \langle x, x \rangle & \langle x, x^2 \rangle \\
\langle x^2, 1 \rangle & \langle x^2, x \rangle & \langle x^2, x^2 \rangle
\end{bmatrix} = \begin{bmatrix}
\pi & 0 & \frac{\pi}{2} \\
0 & \frac{\pi}{2} & 0 \\
\frac{\pi}{2} & 0 & \frac{3\pi}{8}
\end{bmatrix},\quad \mathbf{c}=\begin{bmatrix}
\langle y, 1 \rangle \\
\langle y, x \rangle \\
\langle y, x^2 \rangle
\end{bmatrix} = \begin{bmatrix}
\int_{-1}^1 dx \\
\int_{-1}^1 x \, dx \\
\int_{-1}^1 x^2 \, dx
\end{bmatrix} = \begin{bmatrix}
2 \\
0 \\
\frac{2}{3}
\end{bmatrix}
\]

\[
G(1, x, x^2)^T\mathbf{a} = \mathbf{c} \Rightarrow \begin{bmatrix}
\pi & 0 & \frac{\pi}{2} \\
0 & \frac{\pi}{2} & 0 \\
\frac{\pi}{2} & 0 & \frac{3\pi}{8}
\end{bmatrix} \begin{bmatrix}
a_0 \\
a_1 \\
a_2
\end{bmatrix} = \begin{bmatrix}
2 \\
0 \\
\frac{2}{3}
\end{bmatrix} \Rightarrow \begin{bmatrix}
a_0 \\
a_1 \\
a_2
\end{bmatrix} = \begin{bmatrix}
\frac{10}{3\pi} \\
0 \\
-\frac{8}{3\pi}
\end{bmatrix}
\]

So, \(\hat{y}(x) = \frac{10 - 8x^2}{3\pi}\).





\section*{IV}

\[
\begin{array}{c|cccccccccccc}
x_i & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 & 12 \\
\hline
y_i & 256 & 201 & 159 & 61 & 77 & 40 & 17 & 25 & 103 & 156 & 222 & 345 \\
\end{array}
\]

\subsection*{(a)}
In this case we have \(\|u(x)\|^2 = {\langle u(x), u(x) \rangle} = \sum_{i=1}^N \rho(x_i) u(x_i)^2 \) with \(\rho(x) = 1\) and \(N=12\).

\(\Rightarrow \|u(x)\|= \sqrt{\sum_{i=1}^{12} u(x_i)^2}\)

Apply Gram-Schmidt process to \((1,x,x^2)\), we have \begin{align*}
  u_0(x) &= 1, & \quad u^*_0(x) &= \frac{u_0(x)}{\|u_0(x)\|} = \frac{1}{2\sqrt{3}}, \\
  u_1(x) &= x - \langle x, u^*_0 \rangle u^*_0 = x - \frac{13}{2}, & \quad u^*_1(x) &= \frac{u_1(x)}{\|u_1(x)\|} = \frac{x - \frac{13}{2}}{\sqrt{143}}, \\
  u_2(x) &= x^2 - \langle x^2, u^*_0 \rangle u^*_0 - \langle x^2, u^*_1 \rangle u^*_1 = x^2 - 13x + \frac{91}{3}, & \quad u^*_2(x) &= \frac{u_2(x)}{\|u_2(x)\|} = \frac{x^2 - 13x + \frac{91}{3}}{\sqrt{\frac{4004}{3}}}.
\end{align*}

Thus, we get a orthonormal basis \(\left\{\frac{1}{2\sqrt{3}}, \frac{1}{\sqrt{143}}\left(x - \frac{13}{2}\right), \sqrt{\frac{3}{4004}}\left(x^2 - 13x + \frac{91}{3}\right)\right\}\).

\subsection*{(b)}

We can calculate that: (It is the same as the calculation in \textbf{Example 5.55.})
\[
\begin{bmatrix}
  \langle y, 1 \rangle \\
  \langle y, x \rangle \\
  \langle y, x^2 \rangle 
\end{bmatrix}
=
\begin{bmatrix}
  \sum_{i=1}^{12} y_i \\
  \sum_{i=1}^{12} y_i x_i \\
  \sum_{i=1}^{12} y_i x_i^2
\end{bmatrix}
=
\begin{bmatrix}
  1662  \\
  11392  \\
  109750
\end{bmatrix}
\]

By Fourier expansion \(\hat{\phi}(x)=\sum_{n=0}^2\langle y,u_n^* \rangle u_n^*\), we get \begin{align*}
  \langle u_{0}^{*}, y \rangle &= \sum_{i=1}^{12} \frac{y_{i}}{2\sqrt{3}} = \frac{1662}{2\sqrt{3}}=277\sqrt{3}, \\
  \langle u_{1}^{*}, y \rangle &= \sum_{i=1}^{12} \frac{y_{i}(x_{i}-\frac{13}{2})}{\sqrt{143}} = \frac{11392-1662 \times \frac{13}{2}}{\sqrt{143}}=\frac{589}{\sqrt{143}}, \\
  \langle u_{2}^{*}, y \rangle &= \sum_{i=1}^{12} \sqrt{\frac{3}{4004}} y_{i}(x_{i}^{2}-13x_{i}+\frac{91}{3}) = \sqrt{\frac{3}{4004}}(109750-13 \times 11392+ \frac{91}{3}\times1662) = 12068\sqrt{\frac{3}{4004}}, 
  \end{align*}
Thus, \(\hat{\varphi} = \langle u_{0}^{*}, y \rangle u_{0}^{*} + \langle u_{1}^{*}, y \rangle u_{1}^{*} + \langle u_{2}^{*}, y \rangle u_{2}^{*} = \frac{277}{2} + \frac{589}{143}\left(x - \frac{13}{2}\right) + \frac{36204}{4004}\left(x^2 - 13x + \frac{91}{3}\right) = 9.04196x^2 - 113.427x + 386.000\), which is the same as the result in \textbf{Example 5.55.}

\subsection*{(c)} 
Assume there are \( m \) groups of \(\{y_i\}\)'s to fit \(m\) polynomials of degree \( p \) based on \( N \) given data points using the same \(\{x_i\}\)'s

The orthonormal basis \(\{u_0^*, u_1^*, u_2^*\}\) does not need to be recalculated; however, \(\langle u_0^*, y \rangle\), \(\langle u_1^*, y \rangle\), and \(\langle u_2^*, y \rangle\) need to be recalculated.

If the method of \textbf{orthonormal polynomials} is used, first, it takes \( O(Np^2) \) operations to obtain the orthonormal basis \(\{u_i^*\}_{i=0}^p\) by Gram-Schmidt method. Then, for each set of data, it takes \( O(Np) \) operations to get \(\{\langle u_i^*, y \rangle\}_{i=0}^p\). The total time complexity required is \( O(Np^2 + Nmp) \).

If the method of \textbf{normal equations} is used, first, it takes \( O(Np^2) \) operations to obtain \( G(1, x, \ldots, x^p) \). Then, for different \( y \), it takes \( O(Np) \) operations to get \(\{\langle x_i, y \rangle\}_{i=0}^p\), and finally, it requires \( O(p^3) \) operations to solve the system of equations. The total time complexity required is \( O(Np^2 + m(Np + p^3)) \).

It can be observed that the advantage of the method of \textbf{orthonormal polynomials} is that it does not require solving a system of linear equations, which is a computationally intensive operation with high time complexity.

\section*{V}

For \(A \in \mathbb{F}^{m \times n} \), the SVD in \textbf{Definition B.234} is \(V \Sigma U^*\), but I would like to use \(U \Sigma V^*\) instead, where \( U = (u_1, \ldots, u_m) \), \( V = (v_1, \ldots, v_n) \) are unitary, and
\[
\Sigma = \begin{bmatrix}
\sigma_1 & & & 0 \\
& \ddots & & \\
& & \sigma_r & \\
0 & & & 0
\end{bmatrix}_{m \times n}
\quad \Sigma^+ = 
\begin{bmatrix}
\frac{1}{\sigma_1} & & & 0 \\
& \ddots & & \\
& & \frac{1}{\sigma_r} & \\
0 & & & 0
\end{bmatrix}_{n \times m}
\]

And the definition of \(A^+\) is:
\[
\forall y \in \mathbb{F}^m, \quad A^+ y = \sum_{j=1}^r \frac{1}{\sigma_j} \langle y, u_j \rangle v_j,
\]

\subsection*{Proof of \textbf{Theorem 5.66}}
\subsubsection*{Lemma: \(A^+ = V \Sigma^+ U^*\)}

\textbf{Proof:} 
\[
\forall i=1,\cdots,m,  \quad A^+ u_i = \sum_{j=1}^r \frac{1}{\sigma_j} \langle u_i, u_j \rangle v_j = \frac{1}{\sigma_i}v_i
\]
\[\Rightarrow A^+U=A^+(u_1,\cdots,u_m)=\left(\frac{1}{\sigma_1}v_1,\cdots,\frac{1}{\sigma_m}v_m\right)=V\Sigma^+\Rightarrow A^+=V\Sigma^+U^*\]

\subsubsection*{(PDI-1)\( AA^+A = A \)}
\textbf{Proof:} \( AA^+ A = (U \Sigma V^*)(V \Sigma^+ U^*)(U \Sigma V^*) = U \Sigma \Sigma^+ \Sigma V^* = U \Sigma V^* = A \)

\subsubsection*{(PDI-2)\( A^+AA^+ = A^+ \)}
\textbf{Proof:} \( A^+ A A^+ = (V \Sigma^+ U^*)(U \Sigma V^*)(V \Sigma^+ U^*) = V \Sigma^+ \Sigma \Sigma^+ U^* = V \Sigma^+ U^* = A^+ \)


\subsubsection*{(PDI-3)\( (AA^+)^* = AA^+ \) and \( (A^+A)^* = A^+A \)}
\textbf{Proof:} \begin{align*}
  (AA^+)^* &= ((U \Sigma V^*)(V \Sigma^+ U^*))^* = (U \Sigma \Sigma^+ U^*)^* = U (\Sigma \Sigma^+)^* U^* = U \Sigma \Sigma^+ U^* = AA^+ \\
  (A^+ A)^* &= ((V \Sigma^+ U^*)(U \Sigma V^*))^* = V (\Sigma^+ \Sigma)^* V^* = V \Sigma^+ \Sigma V^* = A^+ A
\end{align*}



\subsection*{Proof of \textbf{Lemma 5.67}}

When \( A \) has full \textbf{column} rank, \( r = n \).
\[
(A^* A)^{-1} A^* = ((U \Sigma V^*)^* (U \Sigma V^*))^{-1} (U \Sigma V^*)^* = (V \Sigma^* \Sigma V^*)^{-1} V \Sigma^* U^*
\]

\[
\Sigma = \begin{bmatrix}
\sigma_1 & & \\
& \ddots & \\
& & \sigma_n \\
& & 0
\end{bmatrix}_{m \times n} \quad
\Sigma^* \Sigma = \begin{bmatrix}
\sigma_1^2 & & \\
& \ddots & \\
& & \sigma_n^2
\end{bmatrix}_{n \times n} \Rightarrow
(\Sigma^* \Sigma)^{-1} \Sigma^* = \Sigma^+.
\]

Thus, \((A^* A)^{-1} A^* = V (\Sigma^* \Sigma)^{-1} V^{-1} V \Sigma^* U^* = V \Sigma^+ U^* = A^+\), which implies \(A^+ A = I\).

When \( A \) has full \textbf{row} rank, \( r = m \).

\[
A^* (AA^*)^{-1} = (U \Sigma V^*)^* ((U \Sigma V^*)(U \Sigma V^*)^*)^{-1} = V \Sigma^* U^* (U \Sigma \Sigma^* U^*)^{-1},
\]
\[
\Sigma = \begin{bmatrix}
\sigma_1 & & & \\
& \ddots & & \\
& & \sigma_n & 0
\end{bmatrix}_{m \times n} \quad
\Sigma \Sigma^* = \begin{bmatrix}
\sigma_1^2 & & \\
& \ddots & \\
& & \sigma_m^2
\end{bmatrix}_{m \times m}\Rightarrow
(\Sigma \Sigma^*)^{-1} \Sigma = \Sigma^+
\]

Thus,\(A^* (AA^*)^{-1} = V \Sigma^* U^* U (\Sigma \Sigma^*)^{-1} U^* = V \Sigma^+ U^* = A^+
\), which implies \( AA^+ = I \).

\end{document}